Single cell RNA seq analysis of TB infection in murine lungs¶

Irene Jiang and Yizhou Yu

Original data: https://www.cell.com/cell-reports/pdf/S2211-1247(22)00769-0.pdf

Summary of the workflow:

  1. Import functions and curate data
  2. Visualisation
  3. Differential gene expression analysis

Preprocessing¶

In [254]:
import numpy as np
import pandas as pd
import scanpy as sc
import anndata
from anndata import AnnData
import bbknn
import gseapy
import matplotlib.pyplot as plt
from scipy.stats import mannwhitneyu
from scipy.stats import t
from statsmodels.stats import multitest

https://www.cell.com/cell-reports/pdf/S2211-1247(22)00769-0.pdf

In [2]:
sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.settings.set_figure_params(dpi=80, facecolor='white')
scanpy==1.9.3 anndata==0.9.1 umap==0.5.3 numpy==1.23.5 scipy==1.10.0 pandas==1.5.3 scikit-learn==1.2.1 statsmodels==0.13.5 python-igraph==0.10.6 pynndescent==0.5.10
In [3]:
results_file = 'single_cell_test_1.h5ad'  # the file that will store the analysis results
In [4]:
adata1 = sc.read_10x_mtx(
    r"C:\Users\irene\Tuberculosis_Research\RNA Sequencing Analysis\uninfected_rep1",  # the directory with the `.mtx` file
    var_names='gene_symbols',                # use gene symbols for the variable names (variables-axis index)
    cache=True)                              # write a cache file for faster subsequent reading
... reading from cache file cache\C-Users-irene-Tuberculosis_Research-RNA Sequencing Analysis-uninfected_rep1-matrix.h5ad
In [5]:
adata2 = sc.read_10x_mtx(
    r"C:\Users\irene\Tuberculosis_Research\RNA Sequencing Analysis\uninfected_rep2",  # the directory with the `.mtx` file
    var_names='gene_symbols',                # use gene symbols for the variable names (variables-axis index)
    cache=True)                              # write a cache file for faster subsequent reading
... reading from cache file cache\C-Users-irene-Tuberculosis_Research-RNA Sequencing Analysis-uninfected_rep2-matrix.h5ad
In [6]:
adata3 = sc.read_10x_mtx(
    r"C:\Users\irene\Tuberculosis_Research\RNA Sequencing Analysis\infected_d50_rep1",  # the directory with the `.mtx` file
    var_names='gene_symbols',                # use gene symbols for the variable names (variables-axis index)
    cache=True)                              # write a cache file for faster subsequent reading
... reading from cache file cache\C-Users-irene-Tuberculosis_Research-RNA Sequencing Analysis-infected_d50_rep1-matrix.h5ad
In [7]:
adata4 = sc.read_10x_mtx(
    r"C:\Users\irene\Tuberculosis_Research\RNA Sequencing Analysis\infected_d50_rep2",  # the directory with the `.mtx` file
    var_names='gene_symbols',                # use gene symbols for the variable names (variables-axis index)
    cache=True)                              # write a cache file for faster subsequent reading
... reading from cache file cache\C-Users-irene-Tuberculosis_Research-RNA Sequencing Analysis-infected_d50_rep2-matrix.h5ad
In [8]:
adata5 = sc.read_10x_mtx(
    r"C:\Users\irene\Tuberculosis_Research\RNA Sequencing Analysis\infected_d50_rep3",  # the directory with the `.mtx` file
    var_names='gene_symbols',                # use gene symbols for the variable names (variables-axis index)
    cache=True)                              # write a cache file for faster subsequent reading
... reading from cache file cache\C-Users-irene-Tuberculosis_Research-RNA Sequencing Analysis-infected_d50_rep3-matrix.h5ad
In [9]:
adata3.obs['condition'] = "infected"
adata3.obs['replicate'] = "1"
adata4.obs['condition'] = "infected"
adata4.obs['replicate'] = "2"
adata5.obs['condition'] = "infected"
adata5.obs['replicate'] = "3"
In [10]:
adata1.obs['condition'] = "uninfected"
adata1.obs['replicate'] = "1"
adata2.obs['condition'] = "uninfected"
adata2.obs['replicate'] = "2"
In [11]:
adatas = [adata1, adata2,adata3, adata4, adata5]
adata = anndata.concat(adatas)
C:\Users\irene\anaconda3\lib\site-packages\anndata\_core\anndata.py:1830: UserWarning: Observation names are not unique. To make them unique, call `.obs_names_make_unique`.
  utils.warn_names_duplicates("obs")
In [12]:
adata.var = adata1.var
In [13]:
adata.var_names_make_unique()  # this is unnecessary if using `var_names='gene_ids'` in `sc.read_10x_mtx`
In [14]:
adata
Out[14]:
AnnData object with n_obs × n_vars = 25227 × 27998
    obs: 'condition', 'replicate'
    var: 'gene_ids', 'feature_types'
In [15]:
sc.pl.highest_expr_genes(adata, n_top=20, )
normalizing counts per cell
    finished (0:00:00)
In [16]:
sc.pp.filter_cells(adata, min_genes=200)
sc.pp.filter_genes(adata, min_cells=3)
filtered out 1267 cells that have less than 200 genes expressed
C:\Users\irene\anaconda3\lib\site-packages\anndata\_core\anndata.py:1830: UserWarning: Observation names are not unique. To make them unique, call `.obs_names_make_unique`.
  utils.warn_names_duplicates("obs")
filtered out 11214 genes that are detected in less than 3 cells
C:\Users\irene\anaconda3\lib\site-packages\anndata\_core\anndata.py:1830: UserWarning: Observation names are not unique. To make them unique, call `.obs_names_make_unique`.
  utils.warn_names_duplicates("obs")
In [17]:
adata.var['mt'] = adata.var_names.str.startswith('mt-')  # annotate the group of mitochondrial genes as 'mt'
sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)
In [18]:
sc.pl.violin(adata, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt'],
             jitter=0.4, multi_panel=True)
In [19]:
sc.pl.scatter(adata, x='total_counts', y='pct_counts_mt')
sc.pl.scatter(adata, x='total_counts', y='n_genes_by_counts')
In [20]:
adata = adata[adata.obs.n_genes_by_counts < 4000, :]
adata = adata[adata.obs.pct_counts_mt < 20, :]
In [21]:
sc.pp.normalize_total(adata, target_sum=1e4)
normalizing counts per cell
C:\Users\irene\anaconda3\lib\site-packages\scanpy\preprocessing\_normalization.py:170: UserWarning: Received a view of an AnnData. Making a copy.
  view_to_actual(adata)
C:\Users\irene\anaconda3\lib\site-packages\anndata\_core\anndata.py:1830: UserWarning: Observation names are not unique. To make them unique, call `.obs_names_make_unique`.
  utils.warn_names_duplicates("obs")
    finished (0:00:00)
In [22]:
sc.pp.log1p(adata)
In [23]:
sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
extracting highly variable genes
    finished (0:00:01)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
In [24]:
sc.pl.highly_variable_genes(adata)
In [25]:
adata.raw = adata
In [26]:
adata = adata[:, adata.var.highly_variable]
In [27]:
sc.pp.regress_out(adata, ['total_counts', 'pct_counts_mt'])
regressing out ['total_counts', 'pct_counts_mt']
    sparse input is densified and may lead to high memory use
C:\Users\irene\anaconda3\lib\site-packages\anndata\_core\anndata.py:1830: UserWarning: Observation names are not unique. To make them unique, call `.obs_names_make_unique`.
  utils.warn_names_duplicates("obs")
    finished (0:00:40)
In [28]:
sc.pp.scale(adata, max_value=10)
In [29]:
sc.tl.pca(adata, svd_solver='arpack')
computing PCA
    on highly variable genes
    with n_comps=50
    finished (0:00:02)
In [30]:
sc.pl.pca(adata, color = 'condition')
#coloring by expression level
C:\Users\irene\anaconda3\lib\site-packages\scanpy\plotting\_tools\scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
In [32]:
sc.pl.pca_variance_ratio(adata, log=True)
In [33]:
adata.write(results_file)
In [34]:
sc.pp.neighbors(adata, n_neighbors=10, n_pcs=40)
computing neighbors
    using 'X_pca' with n_pcs = 40
    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:00:27)
In [35]:
sc.tl.umap(adata)
computing UMAP
    finished: added
    'X_umap', UMAP coordinates (adata.obsm) (0:00:20)
In [38]:
sc.pl.umap(adata, color=['Malat1', 'Hbb-bs', 'Gm42418','condition'])
C:\Users\irene\anaconda3\lib\site-packages\scanpy\plotting\_tools\scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(

Cell type annotations¶

In [39]:
sc.tl.leiden(adata, resolution = 0.1, key_added = "leiden_0.1")

sc.pl.umap(adata, color=['leiden_0.1'])
running Leiden clustering
    finished: found 10 clusters and added
    'leiden_0.1', the cluster labels (adata.obs, categorical) (0:00:01)
C:\Users\irene\anaconda3\lib\site-packages\scanpy\plotting\_tools\scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
In [40]:
sc.tl.rank_genes_groups(adata, 'leiden_0.1', method='t-test')
sc.pl.rank_genes_groups(adata, n_genes=10, sharey=False)
ranking genes
    finished: added to `.uns['rank_genes_groups']`
    'names', sorted np.recarray to be indexed by group ids
    'scores', sorted np.recarray to be indexed by group ids
    'logfoldchanges', sorted np.recarray to be indexed by group ids
    'pvals', sorted np.recarray to be indexed by group ids
    'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:09)
In [51]:
adata.write(results_file)
In [90]:
b_cells = ['Cd19','Cd79a','Ms4a1']
cd4_t_cells = ['Cd4', 'Cd3e']
cd8_t_cells = ['Cd8a', 'Cd3e']
nk_cells = ['Nkg7','Klrd1','Tyrobp']
active_t_cells = ['Cd3e', 'Ccl5', 'Cx3cr1']
endothelial_cells = ['Egfl7', 'Epas1', 'Ramp2']
In [91]:
sc.pl.umap(adata,color = b_cells, ncols = 3)
sc.pl.umap(adata,color = cd4_t_cells, ncols = 3)
sc.pl.umap(adata,color = cd8_t_cells, ncols = 3)
sc.pl.umap(adata,color = nk_cells, ncols = 3)
sc.pl.umap(adata,color = active_t_cells, ncols = 3)
sc.pl.umap(adata,color = endothelial_cells, ncols = 3)
In [92]:
new_cluster_names = [
    'B cells',
    'CD4 T cells', 'CD8 T cells',
    'NK cells','Active T cells','Endothelial cells','outliers1','outlier2', 'outlier3', 'outlier4']
adata.rename_categories('leiden_0.1', new_cluster_names)
In [93]:
adata.obs
Out[93]:
condition replicate n_genes n_genes_by_counts total_counts total_counts_mt pct_counts_mt leiden_0.1
AAACCTGAGCAATATG-1 uninfected 1 1182 1182 1955.0 15.0 0.767263 Endothelial cells
AAACCTGAGCATCATC-1 uninfected 1 1916 1916 6060.0 224.0 3.696370 CD4 T cells
AAACCTGCAAGTACCT-1 uninfected 1 1740 1740 3536.0 60.0 1.696833 NK cells
AAACCTGCAGAGTGTG-1 uninfected 1 1301 1301 3279.0 106.0 3.232693 B cells
AAACCTGCAGATCGGA-1 uninfected 1 953 953 1815.0 61.0 3.360882 B cells
... ... ... ... ... ... ... ... ...
TTTGTCATCAGTTCGA-1 infected 3 1846 1846 6657.0 190.0 2.854138 CD8 T cells
TTTGTCATCCAAACAC-1 infected 3 1845 1845 4615.0 40.0 0.866739 NK cells
TTTGTCATCGTAGGTT-1 infected 3 1378 1378 2985.0 90.0 3.015075 NK cells
TTTGTCATCTGCTGTC-1 infected 3 1569 1569 3451.0 60.0 1.738626 NK cells
TTTGTCATCTTGGGTA-1 infected 3 1484 1484 3931.0 104.0 2.645637 CD4 T cells

23823 rows × 8 columns

In [94]:
sc.pl.umap(adata, color='leiden_0.1', legend_loc='on data', title='', frameon=False, save='.pdf')
WARNING: saving figure to file figures\umap.pdf
C:\Users\irene\anaconda3\lib\site-packages\scanpy\plotting\_tools\scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
In [238]:
sc.pl.umap(adata, color='condition', title='', frameon=False, save='_condition.pdf')
WARNING: saving figure to file figures\umap_condition.pdf
C:\Users\irene\anaconda3\lib\site-packages\scanpy\plotting\_tools\scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
In [95]:
marker_genes = b_cells + cd4_t_cells + cd8_t_cells + nk_cells + active_t_cells + endothelial_cells
sc.pl.dotplot(adata, marker_genes, groupby='leiden_0.1');
C:\Users\irene\anaconda3\lib\site-packages\scanpy\plotting\_dotplot.py:749: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  dot_ax.scatter(x, y, **kwds)
In [97]:
sc.tl.rank_genes_groups(adata, 'leiden_0.1', method='wilcoxon', key_added="wilcoxon")
sc.tl.rank_genes_groups(adata, 'leiden_0.1', method='t-test', key_added="t-test")
ranking genes
    finished: added to `.uns['wilcoxon']`
    'names', sorted np.recarray to be indexed by group ids
    'scores', sorted np.recarray to be indexed by group ids
    'logfoldchanges', sorted np.recarray to be indexed by group ids
    'pvals', sorted np.recarray to be indexed by group ids
    'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:28)
ranking genes
    finished: added to `.uns['t-test']`
    'names', sorted np.recarray to be indexed by group ids
    'scores', sorted np.recarray to be indexed by group ids
    'logfoldchanges', sorted np.recarray to be indexed by group ids
    'pvals', sorted np.recarray to be indexed by group ids
    'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:02)
In [105]:
#compare cluster1 genes, only stores top 100 by default

wc = sc.get.rank_genes_groups_df(adata, group='NK cells', key='wilcoxon', pval_cutoff=0.01, log2fc_min=0)
tt = sc.get.rank_genes_groups_df(adata, group='NK cells', key='t-test', pval_cutoff=0.01, log2fc_min=0)
tt.head()
Out[105]:
names scores logfoldchanges pvals pvals_adj
0 Fcer1g 241.670776 7.981099 0.0 0.0
1 Gzma 240.246628 9.004680 0.0 0.0
2 Tyrobp 224.478134 7.450509 0.0 0.0
3 Ccl5 199.512817 7.259755 0.0 0.0
4 Ncr1 187.117294 9.758260 0.0 0.0

Differential gene expression analysis of cell clusters¶

In [161]:
target_genes = ['Usp24', 'Ccdc122', 'Pdp1', 'Glrx5', 'Slc2a8', 'Lamp1','Zfp575','Klk10', 'Pinlyp']
sc.pl.stacked_violin(adata, target_genes, groupby = 'leiden_0.1')

Cell types: new_cluster_names = [ 'B cells', 'CD4 T cells', 'CD8 T cells', 'NK cells','Active T cells','Endothelial cells','outliers1','outlier2', 'outlier3', 'outlier4']

In [251]:
inf_group
Out[251]:
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
      dtype=float32)
In [281]:
df = sc.get.obs_df(adata, target_genes+ ['leiden_0.1','condition'], use_raw=True)

def yy_ci (x1,x2,m1,m2):
    n1 = x1.size
    n2 = x2.size
    
    v1 = np.var(x1)
    v2 = np.var(x2)
    
    d = m1/m2
    se = np.sqrt(v1/n1 + v2/n2)
    df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1)))
    lb = d - t.ppf(0.975,df) * se
    ub = d + t.ppf(0.975,df) * se
    return lb,ub
    
dge_target = pd.DataFrame()
for celltype in new_cluster_names:
    cells = df[df['leiden_0.1'] == celltype]
    for gene_interest in target_genes:
        inf_group = cells[cells['condition'] == 'infected'][gene_interest].values
        control_group = cells[cells['condition'] == 'uninfected'][gene_interest].values
        u,p = mannwhitneyu(inf_group,control_group)
        inf_mean = np.expm1(np.mean(np.log1p(inf_group))) 
        control_mean = np.expm1(np.mean(np.log1p(control_group)))
        fc = inf_mean / control_mean
        lb,ub = yy_ci(inf_group,control_group,inf_mean,control_mean)
        dt_out = {'mann U':[u],'fc':[fc],'pval':[p],'cell type':[celltype],"gene":[gene_interest],
                 'lb':[lb],'ub':[ub]}
        dt_out = pd.DataFrame(dt_out)
        dge_target = pd.concat([dge_target,dt_out])
dge_target['adj_p'] = multitest.multipletests(dge_target['pval'])[1]
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars
  fc = inf_mean / control_mean
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars
  d = m1/m2
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars
  df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1)))
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: divide by zero encountered in float_scalars
  fc = inf_mean / control_mean
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: divide by zero encountered in float_scalars
  d = m1/m2
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: divide by zero encountered in float_scalars
  fc = inf_mean / control_mean
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: divide by zero encountered in float_scalars
  d = m1/m2
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars
  fc = inf_mean / control_mean
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars
  d = m1/m2
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars
  df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1)))
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars
  fc = inf_mean / control_mean
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars
  d = m1/m2
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars
  df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1)))
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars
  fc = inf_mean / control_mean
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars
  d = m1/m2
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars
  df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1)))
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: divide by zero encountered in float_scalars
  fc = inf_mean / control_mean
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: divide by zero encountered in float_scalars
  d = m1/m2
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars
  fc = inf_mean / control_mean
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars
  d = m1/m2
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars
  df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1)))
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars
  fc = inf_mean / control_mean
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars
  d = m1/m2
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars
  df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1)))
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: divide by zero encountered in float_scalars
  fc = inf_mean / control_mean
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: divide by zero encountered in float_scalars
  d = m1/m2
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: divide by zero encountered in float_scalars
  fc = inf_mean / control_mean
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: divide by zero encountered in float_scalars
  d = m1/m2
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars
  fc = inf_mean / control_mean
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars
  d = m1/m2
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars
  df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1)))
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars
  fc = inf_mean / control_mean
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars
  d = m1/m2
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars
  df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1)))
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars
  fc = inf_mean / control_mean
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars
  d = m1/m2
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars
  df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1)))
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars
  fc = inf_mean / control_mean
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars
  d = m1/m2
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars
  df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1)))
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: divide by zero encountered in float_scalars
  fc = inf_mean / control_mean
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: divide by zero encountered in float_scalars
  d = m1/m2
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars
  fc = inf_mean / control_mean
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars
  d = m1/m2
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars
  df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1)))
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars
  fc = inf_mean / control_mean
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars
  d = m1/m2
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars
  df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1)))
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars
  fc = inf_mean / control_mean
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars
  d = m1/m2
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars
  df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1)))
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars
  fc = inf_mean / control_mean
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars
  d = m1/m2
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars
  df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1)))
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars
  fc = inf_mean / control_mean
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars
  d = m1/m2
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars
  df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1)))
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars
  fc = inf_mean / control_mean
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars
  d = m1/m2
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars
  df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1)))
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars
  fc = inf_mean / control_mean
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars
  d = m1/m2
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars
  df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1)))
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars
  fc = inf_mean / control_mean
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars
  d = m1/m2
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars
  df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1)))
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars
  fc = inf_mean / control_mean
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars
  d = m1/m2
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars
  df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1)))
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars
  fc = inf_mean / control_mean
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars
  d = m1/m2
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars
  df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1)))
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: divide by zero encountered in float_scalars
  fc = inf_mean / control_mean
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: divide by zero encountered in float_scalars
  d = m1/m2
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars
  fc = inf_mean / control_mean
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars
  d = m1/m2
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars
  df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1)))
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars
  fc = inf_mean / control_mean
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars
  d = m1/m2
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars
  df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1)))
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:26: RuntimeWarning: invalid value encountered in float_scalars
  fc = inf_mean / control_mean
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:10: RuntimeWarning: invalid value encountered in float_scalars
  d = m1/m2
C:\Users\irene\AppData\Local\Temp\ipykernel_13740\3771354784.py:12: RuntimeWarning: invalid value encountered in double_scalars
  df = (v1/n1 + v2/n2)**2/(v1**2/(n1**2 * (n1 -1 )) + v2**2 / (n2**2 * (n2-1)))
C:\Users\irene\anaconda3\lib\site-packages\statsmodels\stats\multitest.py:177: RuntimeWarning: divide by zero encountered in log1p
  np.log1p(-pvals))
In [282]:
dge_target[dge_target['pval'] < 0.05]
Out[282]:
mann U fc pval cell type gene lb ub adj_p
0 5824602.0 1.848775 2.704393e-03 B cells Ccdc122 1.841637 1.855914 2.034591e-01
0 5547269.5 0.281678 1.484215e-19 B cells Glrx5 0.268484 0.294872 1.320952e-17
0 2853545.5 0.285144 1.739346e-20 CD4 T cells Glrx5 0.269105 0.301184 1.565411e-18
0 3060867.0 1.780395 3.261525e-03 CD4 T cells Slc2a8 1.770395 1.790395 2.374971e-01
0 2499920.0 1.416703 2.322545e-02 CD8 T cells Pdp1 1.405883 1.427524 8.509469e-01
0 2346440.0 0.387782 1.474287e-12 CD8 T cells Glrx5 0.372499 0.403066 1.282630e-10
0 1654812.5 0.256565 1.764245e-18 NK cells Glrx5 0.237034 0.276096 1.552536e-16
0 1718311.5 0.787590 3.090018e-02 NK cells Lamp1 0.760010 0.815171 9.188141e-01
0 129079.0 0.216788 3.292066e-08 Active T cells Glrx5 0.165980 0.267596 2.831173e-06
0 9111.0 0.176376 7.668835e-04 outliers1 Glrx5 0.097916 0.254835 6.312940e-02
0 1555.0 0.182470 3.356609e-03 outlier2 Glrx5 0.070642 0.294299 2.409631e-01
In [283]:
dge_target[dge_target['adj_p'] < 0.05]
Out[283]:
mann U fc pval cell type gene lb ub adj_p
0 5547269.5 0.281678 1.484215e-19 B cells Glrx5 0.268484 0.294872 1.320952e-17
0 2853545.5 0.285144 1.739346e-20 CD4 T cells Glrx5 0.269105 0.301184 1.565411e-18
0 2346440.0 0.387782 1.474287e-12 CD8 T cells Glrx5 0.372499 0.403066 1.282630e-10
0 1654812.5 0.256565 1.764245e-18 NK cells Glrx5 0.237034 0.276096 1.552536e-16
0 129079.0 0.216788 3.292066e-08 Active T cells Glrx5 0.165980 0.267596 2.831173e-06
In [339]:
fig, ax = plt.subplots(1,1)

data=[[0.28,1.32e-17,"B cells"],
      [0.29,1.56e-18,"CD4 T cells"],
      [0.39,1.28e-10,"CD8 T cells"],
      [0.26,1.55e-16,"NK cells"],
      [0.22,2.83e-06,"Active T cells"]]
column_labels=["Fold Change", "Adj Pval", "Cell Type"]

#creating a 2-dimensional dataframe out of the given data
df=pd.DataFrame(data,columns=column_labels)

ax.axis('tight') #turns off the axis lines and labels
ax.axis('off') #changes x and y axis limits such that all data is shown

table = ax.table(cellText=df.values,
        colLabels=df.columns,
        colColours =["lightcoral"] * 3,
        loc="center")
table.set_fontsize(14)
table.scale(1,2)
plt.show()
In [284]:
dge_pval = dge_target[dge_target['pval'] < 0.05]
In [328]:
fig, ax = plt.subplots(nrows=1, sharex=True, sharey=True, figsize=(4, 2), dpi=500)

dge_pval['label'] = dge_pval['gene'].astype(str) + ' in ' + dge_pval['cell type'].astype(str)

dge_pval = dge_pval.sort_values(by=['fc'])

for idx, row in dge_pval.iterrows():
    ci =  row['ub'] - (row['lb'] + row['ub'])/2    
    if row['adj_p'] < 0.05:
        plt.errorbar(x=[row['fc']], y=[row['label']], xerr=ci,
            ecolor='tab:red', capsize=2.5, linestyle='None', linewidth=0.75, marker="o", 
                     markersize=2.5, mfc="tab:red", mec="tab:red")
    else:
        plt.errorbar(x=[row['fc']], y=[row['label']], xerr=ci,
            ecolor='tab:gray', capsize=2.5, linestyle='None', linewidth=0.75, marker="o", 
                     markersize=2, mfc="tab:gray", mec="tab:gray")
        
ax.tick_params(axis='x', labelsize=5)
ax.tick_params(axis='y', labelsize=4)
#plt.tick_params(axis='both', which='major', labelsize=8)
plt.xlabel('Fold change and 95% Confidence Interval', fontsize=6)
plt.tight_layout()
plt.savefig('forest_plot.png')
plt.show()
In [249]:
genes1 = sc.get.rank_genes_groups_df(cl1_sub, group='infected', key='wilcoxon')['names'][:20]
genes2 = sc.get.rank_genes_groups_df(cl1_sub, group='uninfected', key='wilcoxon')['names'][:20]
genes = genes1.tolist() +  genes2.tolist() 

sc.pl.dotplot(cl1_sub,genes, groupby='condition')
C:\Users\irene\anaconda3\lib\site-packages\scanpy\plotting\_dotplot.py:749: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  dot_ax.scatter(x, y, **kwds)
In [241]:
#Available databases : ‘Human’, ‘Mouse’, ‘Yeast’, ‘Fly’, ‘Fish’, ‘Worm’ 
gene_set_names = gseapy.get_library_name(organism='Human')
print(gene_set_names)
['ARCHS4_Cell-lines', 'ARCHS4_IDG_Coexp', 'ARCHS4_Kinases_Coexp', 'ARCHS4_TFs_Coexp', 'ARCHS4_Tissues', 'Achilles_fitness_decrease', 'Achilles_fitness_increase', 'Aging_Perturbations_from_GEO_down', 'Aging_Perturbations_from_GEO_up', 'Allen_Brain_Atlas_10x_scRNA_2021', 'Allen_Brain_Atlas_down', 'Allen_Brain_Atlas_up', 'Azimuth_Cell_Types_2021', 'BioCarta_2013', 'BioCarta_2015', 'BioCarta_2016', 'BioPlanet_2019', 'BioPlex_2017', 'CCLE_Proteomics_2020', 'CORUM', 'COVID-19_Related_Gene_Sets', 'COVID-19_Related_Gene_Sets_2021', 'Cancer_Cell_Line_Encyclopedia', 'CellMarker_Augmented_2021', 'ChEA_2013', 'ChEA_2015', 'ChEA_2016', 'ChEA_2022', 'Chromosome_Location', 'Chromosome_Location_hg19', 'ClinVar_2019', 'DSigDB', 'Data_Acquisition_Method_Most_Popular_Genes', 'DepMap_WG_CRISPR_Screens_Broad_CellLines_2019', 'DepMap_WG_CRISPR_Screens_Sanger_CellLines_2019', 'Descartes_Cell_Types_and_Tissue_2021', 'Diabetes_Perturbations_GEO_2022', 'DisGeNET', 'Disease_Perturbations_from_GEO_down', 'Disease_Perturbations_from_GEO_up', 'Disease_Signatures_from_GEO_down_2014', 'Disease_Signatures_from_GEO_up_2014', 'DrugMatrix', 'Drug_Perturbations_from_GEO_2014', 'Drug_Perturbations_from_GEO_down', 'Drug_Perturbations_from_GEO_up', 'ENCODE_Histone_Modifications_2013', 'ENCODE_Histone_Modifications_2015', 'ENCODE_TF_ChIP-seq_2014', 'ENCODE_TF_ChIP-seq_2015', 'ENCODE_and_ChEA_Consensus_TFs_from_ChIP-X', 'ESCAPE', 'Elsevier_Pathway_Collection', 'Enrichr_Libraries_Most_Popular_Genes', 'Enrichr_Submissions_TF-Gene_Coocurrence', 'Enrichr_Users_Contributed_Lists_2020', 'Epigenomics_Roadmap_HM_ChIP-seq', 'FANTOM6_lncRNA_KD_DEGs', 'GO_Biological_Process_2013', 'GO_Biological_Process_2015', 'GO_Biological_Process_2017', 'GO_Biological_Process_2017b', 'GO_Biological_Process_2018', 'GO_Biological_Process_2021', 'GO_Biological_Process_2023', 'GO_Cellular_Component_2013', 'GO_Cellular_Component_2015', 'GO_Cellular_Component_2017', 'GO_Cellular_Component_2017b', 'GO_Cellular_Component_2018', 'GO_Cellular_Component_2021', 'GO_Cellular_Component_2023', 'GO_Molecular_Function_2013', 'GO_Molecular_Function_2015', 'GO_Molecular_Function_2017', 'GO_Molecular_Function_2017b', 'GO_Molecular_Function_2018', 'GO_Molecular_Function_2021', 'GO_Molecular_Function_2023', 'GTEx_Aging_Signatures_2021', 'GTEx_Tissue_Expression_Down', 'GTEx_Tissue_Expression_Up', 'GTEx_Tissues_V8_2023', 'GWAS_Catalog_2019', 'GWAS_Catalog_2023', 'GeDiPNet_2023', 'GeneSigDB', 'Gene_Perturbations_from_GEO_down', 'Gene_Perturbations_from_GEO_up', 'Genes_Associated_with_NIH_Grants', 'Genome_Browser_PWMs', 'GlyGen_Glycosylated_Proteins_2022', 'HDSigDB_Human_2021', 'HDSigDB_Mouse_2021', 'HMDB_Metabolites', 'HMS_LINCS_KinomeScan', 'HomoloGene', 'HuBMAP_ASCT_plus_B_augmented_w_RNAseq_Coexpression', 'HuBMAP_ASCTplusB_augmented_2022', 'HumanCyc_2015', 'HumanCyc_2016', 'Human_Gene_Atlas', 'Human_Phenotype_Ontology', 'IDG_Drug_Targets_2022', 'InterPro_Domains_2019', 'Jensen_COMPARTMENTS', 'Jensen_DISEASES', 'Jensen_TISSUES', 'KEA_2013', 'KEA_2015', 'KEGG_2013', 'KEGG_2015', 'KEGG_2016', 'KEGG_2019_Human', 'KEGG_2019_Mouse', 'KEGG_2021_Human', 'KOMP2_Mouse_Phenotypes_2022', 'Kinase_Perturbations_from_GEO_down', 'Kinase_Perturbations_from_GEO_up', 'L1000_Kinase_and_GPCR_Perturbations_down', 'L1000_Kinase_and_GPCR_Perturbations_up', 'LINCS_L1000_CRISPR_KO_Consensus_Sigs', 'LINCS_L1000_Chem_Pert_Consensus_Sigs', 'LINCS_L1000_Chem_Pert_down', 'LINCS_L1000_Chem_Pert_up', 'LINCS_L1000_Ligand_Perturbations_down', 'LINCS_L1000_Ligand_Perturbations_up', 'Ligand_Perturbations_from_GEO_down', 'Ligand_Perturbations_from_GEO_up', 'MAGMA_Drugs_and_Diseases', 'MAGNET_2023', 'MCF7_Perturbations_from_GEO_down', 'MCF7_Perturbations_from_GEO_up', 'MGI_Mammalian_Phenotype_2013', 'MGI_Mammalian_Phenotype_2017', 'MGI_Mammalian_Phenotype_Level_3', 'MGI_Mammalian_Phenotype_Level_4', 'MGI_Mammalian_Phenotype_Level_4_2019', 'MGI_Mammalian_Phenotype_Level_4_2021', 'MSigDB_Computational', 'MSigDB_Hallmark_2020', 'MSigDB_Oncogenic_Signatures', 'Metabolomics_Workbench_Metabolites_2022', 'Microbe_Perturbations_from_GEO_down', 'Microbe_Perturbations_from_GEO_up', 'Mouse_Gene_Atlas', 'NCI-60_Cancer_Cell_Lines', 'NCI-Nature_2015', 'NCI-Nature_2016', 'NIH_Funded_PIs_2017_AutoRIF_ARCHS4_Predictions', 'NIH_Funded_PIs_2017_GeneRIF_ARCHS4_Predictions', 'NIH_Funded_PIs_2017_Human_AutoRIF', 'NIH_Funded_PIs_2017_Human_GeneRIF', 'NURSA_Human_Endogenous_Complexome', 'OMIM_Disease', 'OMIM_Expanded', 'Old_CMAP_down', 'Old_CMAP_up', 'Orphanet_Augmented_2021', 'PFOCR_Pathways', 'PFOCR_Pathways_2023', 'PPI_Hub_Proteins', 'PanglaoDB_Augmented_2021', 'Panther_2015', 'Panther_2016', 'Pfam_Domains_2019', 'Pfam_InterPro_Domains', 'PheWeb_2019', 'PhenGenI_Association_2021', 'Phosphatase_Substrates_from_DEPOD', 'ProteomicsDB_2020', 'Proteomics_Drug_Atlas_2023', 'RNA-Seq_Disease_Gene_and_Drug_Signatures_from_GEO', 'RNAseq_Automatic_GEO_Signatures_Human_Down', 'RNAseq_Automatic_GEO_Signatures_Human_Up', 'RNAseq_Automatic_GEO_Signatures_Mouse_Down', 'RNAseq_Automatic_GEO_Signatures_Mouse_Up', 'Rare_Diseases_AutoRIF_ARCHS4_Predictions', 'Rare_Diseases_AutoRIF_Gene_Lists', 'Rare_Diseases_GeneRIF_ARCHS4_Predictions', 'Rare_Diseases_GeneRIF_Gene_Lists', 'Reactome_2013', 'Reactome_2015', 'Reactome_2016', 'Reactome_2022', 'SILAC_Phosphoproteomics', 'SubCell_BarCode', 'SynGO_2022', 'SysMyo_Muscle_Gene_Sets', 'TF-LOF_Expression_from_GEO', 'TF_Perturbations_Followed_by_Expression', 'TG_GATES_2020', 'TRANSFAC_and_JASPAR_PWMs', 'TRRUST_Transcription_Factors_2019', 'Table_Mining_of_CRISPR_Studies', 'Tabula_Muris', 'Tabula_Sapiens', 'TargetScan_microRNA', 'TargetScan_microRNA_2017', 'The_Kinase_Library_2023', 'Tissue_Protein_Expression_from_Human_Proteome_Map', 'Tissue_Protein_Expression_from_ProteomicsDB', 'Transcription_Factor_PPIs', 'UK_Biobank_GWAS_v1', 'Virus-Host_PPI_P-HIPSTer_2020', 'VirusMINT', 'Virus_Perturbations_from_GEO_down', 'Virus_Perturbations_from_GEO_up', 'WikiPathway_2021_Human', 'WikiPathways_2013', 'WikiPathways_2015', 'WikiPathways_2016', 'WikiPathways_2019_Human', 'WikiPathways_2019_Mouse', 'dbGaP', 'huMAP', 'lncHUB_lncRNA_Co-Expression', 'miRTarBase_2017']
In [ ]: